{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "import scipy.sparse\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with open('user2id.json', 'r') as f:\n",
    "    user2id = json.load(f)\n",
    "    \n",
    "with open('song2id.json', 'r') as f:\n",
    "    song2id = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "train_tp = pd.read_csv('in.train.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "uid = map(lambda x: user2id[x], train_tp['uid'])\n",
    "sid = map(lambda x: song2id[x], train_tp['sid'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "train_tp['uid'] = uid\n",
    "train_tp['sid'] = sid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>sid</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>235110</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>176423</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>14039</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>256592</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>144595</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>84597</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>78712</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1</td>\n",
       "      <td>38088</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2</td>\n",
       "      <td>122359</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>3</td>\n",
       "      <td>63360</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>3</td>\n",
       "      <td>108146</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>3</td>\n",
       "      <td>151870</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>3</td>\n",
       "      <td>184120</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>4</td>\n",
       "      <td>65724</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>4</td>\n",
       "      <td>208355</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>4</td>\n",
       "      <td>247144</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>4</td>\n",
       "      <td>244040</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>4</td>\n",
       "      <td>225622</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>4</td>\n",
       "      <td>14142</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>4</td>\n",
       "      <td>75307</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>4</td>\n",
       "      <td>214683</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>4</td>\n",
       "      <td>240609</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>4</td>\n",
       "      <td>90723</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>4</td>\n",
       "      <td>61866</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>4</td>\n",
       "      <td>144329</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>4</td>\n",
       "      <td>189095</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>4</td>\n",
       "      <td>194541</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>4</td>\n",
       "      <td>213741</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>5</td>\n",
       "      <td>220387</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>7</td>\n",
       "      <td>158813</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762223</th>\n",
       "      <td>546484</td>\n",
       "      <td>75731</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762224</th>\n",
       "      <td>548836</td>\n",
       "      <td>256958</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762225</th>\n",
       "      <td>549051</td>\n",
       "      <td>127386</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762226</th>\n",
       "      <td>549363</td>\n",
       "      <td>247086</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762227</th>\n",
       "      <td>550842</td>\n",
       "      <td>67193</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762228</th>\n",
       "      <td>552060</td>\n",
       "      <td>171551</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762229</th>\n",
       "      <td>553942</td>\n",
       "      <td>249315</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762230</th>\n",
       "      <td>554181</td>\n",
       "      <td>18372</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762231</th>\n",
       "      <td>554283</td>\n",
       "      <td>138650</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762232</th>\n",
       "      <td>554701</td>\n",
       "      <td>86088</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762233</th>\n",
       "      <td>555315</td>\n",
       "      <td>39964</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762234</th>\n",
       "      <td>555437</td>\n",
       "      <td>67505</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762235</th>\n",
       "      <td>555896</td>\n",
       "      <td>163195</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762236</th>\n",
       "      <td>556564</td>\n",
       "      <td>249702</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762237</th>\n",
       "      <td>557202</td>\n",
       "      <td>136270</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762238</th>\n",
       "      <td>557381</td>\n",
       "      <td>21972</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762239</th>\n",
       "      <td>557704</td>\n",
       "      <td>4517</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762240</th>\n",
       "      <td>557727</td>\n",
       "      <td>171965</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762241</th>\n",
       "      <td>557855</td>\n",
       "      <td>240145</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762242</th>\n",
       "      <td>558437</td>\n",
       "      <td>172609</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762243</th>\n",
       "      <td>558525</td>\n",
       "      <td>56060</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762244</th>\n",
       "      <td>559550</td>\n",
       "      <td>146573</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762245</th>\n",
       "      <td>560092</td>\n",
       "      <td>106778</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762246</th>\n",
       "      <td>561619</td>\n",
       "      <td>128766</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762247</th>\n",
       "      <td>562519</td>\n",
       "      <td>62040</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762248</th>\n",
       "      <td>562623</td>\n",
       "      <td>24589</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762249</th>\n",
       "      <td>562719</td>\n",
       "      <td>235646</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762250</th>\n",
       "      <td>563895</td>\n",
       "      <td>194499</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762251</th>\n",
       "      <td>563957</td>\n",
       "      <td>120751</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1762252</th>\n",
       "      <td>563957</td>\n",
       "      <td>116745</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1762253 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            uid     sid  count\n",
       "0             0  235110      1\n",
       "1             0  176423      1\n",
       "2             0   14039      1\n",
       "3             0  256592      1\n",
       "4             0  144595      1\n",
       "5             1   84597      5\n",
       "6             1   78712     11\n",
       "7             1   38088      5\n",
       "8             2  122359     10\n",
       "9             3   63360      1\n",
       "10            3  108146      1\n",
       "11            3  151870      3\n",
       "12            3  184120      1\n",
       "13            4   65724      2\n",
       "14            4  208355      2\n",
       "15            4  247144      2\n",
       "16            4  244040      1\n",
       "17            4  225622      2\n",
       "18            4   14142      1\n",
       "19            4   75307      2\n",
       "20            4  214683      2\n",
       "21            4  240609      2\n",
       "22            4   90723      1\n",
       "23            4   61866      2\n",
       "24            4  144329      2\n",
       "25            4  189095      2\n",
       "26            4  194541      2\n",
       "27            4  213741      2\n",
       "28            5  220387      7\n",
       "29            7  158813      1\n",
       "...         ...     ...    ...\n",
       "1762223  546484   75731      1\n",
       "1762224  548836  256958      1\n",
       "1762225  549051  127386      1\n",
       "1762226  549363  247086      1\n",
       "1762227  550842   67193      1\n",
       "1762228  552060  171551      1\n",
       "1762229  553942  249315      5\n",
       "1762230  554181   18372      1\n",
       "1762231  554283  138650      1\n",
       "1762232  554701   86088      1\n",
       "1762233  555315   39964      2\n",
       "1762234  555437   67505      1\n",
       "1762235  555896  163195      2\n",
       "1762236  556564  249702      5\n",
       "1762237  557202  136270      1\n",
       "1762238  557381   21972      1\n",
       "1762239  557704    4517      1\n",
       "1762240  557727  171965      1\n",
       "1762241  557855  240145      1\n",
       "1762242  558437  172609      5\n",
       "1762243  558525   56060      1\n",
       "1762244  559550  146573      1\n",
       "1762245  560092  106778      1\n",
       "1762246  561619  128766      3\n",
       "1762247  562519   62040      1\n",
       "1762248  562623   24589      2\n",
       "1762249  562719  235646      1\n",
       "1762250  563895  194499      7\n",
       "1762251  563957  120751      2\n",
       "1762252  563957  116745      2\n",
       "\n",
       "[1762253 rows x 3 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_tp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "train_tp.to_csv('in.train.num.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test_tp = pd.read_csv('in.test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "155006\n"
     ]
    }
   ],
   "source": [
    "print len(pd.unique(test_tp['sid']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "uid = map(lambda x: user2id[x], test_tp['uid'])\n",
    "sid = map(lambda x: song2id[x], test_tp['sid'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test_tp['uid'] = uid\n",
    "test_tp['sid'] = sid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>sid</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>31830</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>210172</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>182784</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>158231</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3</td>\n",
       "      <td>48021</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3</td>\n",
       "      <td>7825</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3</td>\n",
       "      <td>44082</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>4</td>\n",
       "      <td>111631</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4</td>\n",
       "      <td>213191</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>4</td>\n",
       "      <td>84101</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>4</td>\n",
       "      <td>18108</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>6</td>\n",
       "      <td>148557</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>9</td>\n",
       "      <td>254013</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>9</td>\n",
       "      <td>74303</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>17</td>\n",
       "      <td>91550</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>17</td>\n",
       "      <td>253348</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>20</td>\n",
       "      <td>104556</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>20</td>\n",
       "      <td>79111</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>24</td>\n",
       "      <td>196803</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>24</td>\n",
       "      <td>249725</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>24</td>\n",
       "      <td>252702</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>26</td>\n",
       "      <td>80626</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>26</td>\n",
       "      <td>69553</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>26</td>\n",
       "      <td>237640</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>28</td>\n",
       "      <td>103111</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>28</td>\n",
       "      <td>116234</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>28</td>\n",
       "      <td>17439</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>153150</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>234694</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>32</td>\n",
       "      <td>28644</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410051</th>\n",
       "      <td>564379</td>\n",
       "      <td>1172</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410052</th>\n",
       "      <td>564379</td>\n",
       "      <td>78343</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410053</th>\n",
       "      <td>564380</td>\n",
       "      <td>236598</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410054</th>\n",
       "      <td>564381</td>\n",
       "      <td>76192</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410055</th>\n",
       "      <td>564383</td>\n",
       "      <td>145288</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410056</th>\n",
       "      <td>564383</td>\n",
       "      <td>18327</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410057</th>\n",
       "      <td>564383</td>\n",
       "      <td>253693</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410058</th>\n",
       "      <td>564383</td>\n",
       "      <td>180983</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410059</th>\n",
       "      <td>564385</td>\n",
       "      <td>257914</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410060</th>\n",
       "      <td>564387</td>\n",
       "      <td>207577</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410061</th>\n",
       "      <td>564388</td>\n",
       "      <td>229721</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410062</th>\n",
       "      <td>564388</td>\n",
       "      <td>179002</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410063</th>\n",
       "      <td>564392</td>\n",
       "      <td>141731</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410064</th>\n",
       "      <td>564392</td>\n",
       "      <td>184982</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410065</th>\n",
       "      <td>564395</td>\n",
       "      <td>88367</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410066</th>\n",
       "      <td>564395</td>\n",
       "      <td>85599</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410067</th>\n",
       "      <td>564395</td>\n",
       "      <td>215315</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410068</th>\n",
       "      <td>564396</td>\n",
       "      <td>253037</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410069</th>\n",
       "      <td>564401</td>\n",
       "      <td>87279</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410070</th>\n",
       "      <td>564407</td>\n",
       "      <td>119263</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410071</th>\n",
       "      <td>564407</td>\n",
       "      <td>246113</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410072</th>\n",
       "      <td>564407</td>\n",
       "      <td>69796</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410073</th>\n",
       "      <td>564407</td>\n",
       "      <td>56500</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410074</th>\n",
       "      <td>564409</td>\n",
       "      <td>143683</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410075</th>\n",
       "      <td>564413</td>\n",
       "      <td>152456</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410076</th>\n",
       "      <td>564413</td>\n",
       "      <td>139461</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410077</th>\n",
       "      <td>564425</td>\n",
       "      <td>32877</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410078</th>\n",
       "      <td>564425</td>\n",
       "      <td>47475</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410079</th>\n",
       "      <td>564432</td>\n",
       "      <td>19157</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410080</th>\n",
       "      <td>564435</td>\n",
       "      <td>109679</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>410081 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           uid     sid  count\n",
       "0            2   31830      8\n",
       "1            2  210172      2\n",
       "2            3  182784      1\n",
       "3            3  158231      1\n",
       "4            3   48021      1\n",
       "5            3    7825      1\n",
       "6            3   44082      1\n",
       "7            4  111631      5\n",
       "8            4  213191      2\n",
       "9            4   84101      1\n",
       "10           4   18108      2\n",
       "11           6  148557      1\n",
       "12           9  254013      1\n",
       "13           9   74303      1\n",
       "14          17   91550      2\n",
       "15          17  253348      1\n",
       "16          20  104556      2\n",
       "17          20   79111      2\n",
       "18          24  196803      2\n",
       "19          24  249725      1\n",
       "20          24  252702      1\n",
       "21          26   80626      2\n",
       "22          26   69553      1\n",
       "23          26  237640      1\n",
       "24          28  103111      1\n",
       "25          28  116234      1\n",
       "26          28   17439      1\n",
       "27          28  153150      3\n",
       "28          29  234694      1\n",
       "29          32   28644      2\n",
       "...        ...     ...    ...\n",
       "410051  564379    1172      2\n",
       "410052  564379   78343      1\n",
       "410053  564380  236598      6\n",
       "410054  564381   76192      1\n",
       "410055  564383  145288      1\n",
       "410056  564383   18327      1\n",
       "410057  564383  253693      6\n",
       "410058  564383  180983      1\n",
       "410059  564385  257914      1\n",
       "410060  564387  207577      2\n",
       "410061  564388  229721      1\n",
       "410062  564388  179002      2\n",
       "410063  564392  141731      1\n",
       "410064  564392  184982      1\n",
       "410065  564395   88367      1\n",
       "410066  564395   85599      1\n",
       "410067  564395  215315      1\n",
       "410068  564396  253037      1\n",
       "410069  564401   87279      7\n",
       "410070  564407  119263     43\n",
       "410071  564407  246113      1\n",
       "410072  564407   69796      1\n",
       "410073  564407   56500      2\n",
       "410074  564409  143683      1\n",
       "410075  564413  152456      1\n",
       "410076  564413  139461      1\n",
       "410077  564425   32877      1\n",
       "410078  564425   47475      1\n",
       "410079  564432   19157      2\n",
       "410080  564435  109679      7\n",
       "\n",
       "[410081 rows x 3 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_tp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test_tp.to_csv('in.test.num.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "vad_tp = pd.read_csv('in.vad.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "98344\n"
     ]
    }
   ],
   "source": [
    "print len(pd.unique(vad_tp['sid']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "uid = map(lambda x: user2id[x], vad_tp['uid'])\n",
    "sid = map(lambda x: song2id[x], vad_tp['sid'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "vad_tp['uid'] = uid\n",
    "vad_tp['sid'] = sid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>sid</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>30236</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>24</td>\n",
       "      <td>83616</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>24</td>\n",
       "      <td>249741</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>25</td>\n",
       "      <td>115571</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>26</td>\n",
       "      <td>51973</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>29</td>\n",
       "      <td>60242</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>32</td>\n",
       "      <td>65626</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>34</td>\n",
       "      <td>109581</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>39</td>\n",
       "      <td>104679</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>41</td>\n",
       "      <td>172822</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>44</td>\n",
       "      <td>212439</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>47</td>\n",
       "      <td>113561</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>49</td>\n",
       "      <td>251199</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>50</td>\n",
       "      <td>9349</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>52</td>\n",
       "      <td>28295</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>53</td>\n",
       "      <td>110911</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>57</td>\n",
       "      <td>190629</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>58</td>\n",
       "      <td>215632</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>61</td>\n",
       "      <td>190384</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>64</td>\n",
       "      <td>94911</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>68</td>\n",
       "      <td>95426</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>74</td>\n",
       "      <td>130087</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>76</td>\n",
       "      <td>148630</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>77</td>\n",
       "      <td>52916</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>82</td>\n",
       "      <td>199421</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>82</td>\n",
       "      <td>222116</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>82</td>\n",
       "      <td>18124</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>94</td>\n",
       "      <td>218229</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>94</td>\n",
       "      <td>153550</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>109</td>\n",
       "      <td>253721</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162497</th>\n",
       "      <td>509349</td>\n",
       "      <td>77426</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162498</th>\n",
       "      <td>510279</td>\n",
       "      <td>83346</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162499</th>\n",
       "      <td>513173</td>\n",
       "      <td>30397</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162500</th>\n",
       "      <td>515502</td>\n",
       "      <td>251888</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162501</th>\n",
       "      <td>515680</td>\n",
       "      <td>6862</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162502</th>\n",
       "      <td>517203</td>\n",
       "      <td>200720</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162503</th>\n",
       "      <td>520692</td>\n",
       "      <td>214912</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162504</th>\n",
       "      <td>521701</td>\n",
       "      <td>184298</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162505</th>\n",
       "      <td>523400</td>\n",
       "      <td>152964</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162506</th>\n",
       "      <td>525144</td>\n",
       "      <td>222545</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162507</th>\n",
       "      <td>527649</td>\n",
       "      <td>181417</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162508</th>\n",
       "      <td>528254</td>\n",
       "      <td>77881</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162509</th>\n",
       "      <td>528482</td>\n",
       "      <td>145415</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162510</th>\n",
       "      <td>531173</td>\n",
       "      <td>235268</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162511</th>\n",
       "      <td>534328</td>\n",
       "      <td>201650</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162512</th>\n",
       "      <td>536518</td>\n",
       "      <td>36494</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162513</th>\n",
       "      <td>537108</td>\n",
       "      <td>68975</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162514</th>\n",
       "      <td>540174</td>\n",
       "      <td>99626</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162515</th>\n",
       "      <td>541867</td>\n",
       "      <td>88201</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162516</th>\n",
       "      <td>543408</td>\n",
       "      <td>229661</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162517</th>\n",
       "      <td>548231</td>\n",
       "      <td>131175</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162518</th>\n",
       "      <td>548788</td>\n",
       "      <td>208027</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162519</th>\n",
       "      <td>550608</td>\n",
       "      <td>49050</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162520</th>\n",
       "      <td>553301</td>\n",
       "      <td>163583</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162521</th>\n",
       "      <td>554209</td>\n",
       "      <td>206248</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162522</th>\n",
       "      <td>554329</td>\n",
       "      <td>190735</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162523</th>\n",
       "      <td>555258</td>\n",
       "      <td>163640</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162524</th>\n",
       "      <td>560257</td>\n",
       "      <td>225023</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162525</th>\n",
       "      <td>561400</td>\n",
       "      <td>108387</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162526</th>\n",
       "      <td>561619</td>\n",
       "      <td>126924</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>162527 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           uid     sid  count\n",
       "0            4   30236      2\n",
       "1           24   83616      1\n",
       "2           24  249741      1\n",
       "3           25  115571      1\n",
       "4           26   51973      1\n",
       "5           29   60242      5\n",
       "6           32   65626      1\n",
       "7           34  109581     11\n",
       "8           39  104679      1\n",
       "9           41  172822      8\n",
       "10          44  212439      1\n",
       "11          47  113561      4\n",
       "12          49  251199      1\n",
       "13          50    9349      1\n",
       "14          52   28295      1\n",
       "15          53  110911      3\n",
       "16          57  190629      1\n",
       "17          58  215632      6\n",
       "18          61  190384      4\n",
       "19          64   94911      1\n",
       "20          68   95426      3\n",
       "21          74  130087      1\n",
       "22          76  148630      1\n",
       "23          77   52916      1\n",
       "24          82  199421      1\n",
       "25          82  222116      1\n",
       "26          82   18124      1\n",
       "27          94  218229      1\n",
       "28          94  153550      3\n",
       "29         109  253721      4\n",
       "...        ...     ...    ...\n",
       "162497  509349   77426      3\n",
       "162498  510279   83346      1\n",
       "162499  513173   30397      2\n",
       "162500  515502  251888      1\n",
       "162501  515680    6862      1\n",
       "162502  517203  200720      1\n",
       "162503  520692  214912      1\n",
       "162504  521701  184298      1\n",
       "162505  523400  152964      1\n",
       "162506  525144  222545      1\n",
       "162507  527649  181417      2\n",
       "162508  528254   77881      1\n",
       "162509  528482  145415      5\n",
       "162510  531173  235268      1\n",
       "162511  534328  201650      1\n",
       "162512  536518   36494      6\n",
       "162513  537108   68975      1\n",
       "162514  540174   99626      1\n",
       "162515  541867   88201      2\n",
       "162516  543408  229661      2\n",
       "162517  548231  131175      1\n",
       "162518  548788  208027      1\n",
       "162519  550608   49050      3\n",
       "162520  553301  163583      1\n",
       "162521  554209  206248      2\n",
       "162522  554329  190735      1\n",
       "162523  555258  163640     28\n",
       "162524  560257  225023      8\n",
       "162525  561400  108387      1\n",
       "162526  561619  126924      4\n",
       "\n",
       "[162527 rows x 3 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vad_tp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "vad_tp.to_csv('in.vad.num.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "out_tp = pd.read_csv('out.test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "uid = map(lambda x: user2id[x], out_tp['uid'])\n",
    "sid = map(lambda x: song2id[x], out_tp['sid'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "out_tp['uid'] = uid\n",
    "out_tp['sid'] = sid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>sid</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0      </th>\n",
       "      <td>      0</td>\n",
       "      <td> 94610</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1      </th>\n",
       "      <td>      0</td>\n",
       "      <td> 92713</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2      </th>\n",
       "      <td>      0</td>\n",
       "      <td> 94485</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3      </th>\n",
       "      <td>      1</td>\n",
       "      <td> 96234</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4      </th>\n",
       "      <td>      1</td>\n",
       "      <td> 94802</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5      </th>\n",
       "      <td>      1</td>\n",
       "      <td> 93741</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6      </th>\n",
       "      <td>      2</td>\n",
       "      <td> 96220</td>\n",
       "      <td>  6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7      </th>\n",
       "      <td>      4</td>\n",
       "      <td> 93682</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8      </th>\n",
       "      <td>      4</td>\n",
       "      <td> 97181</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9      </th>\n",
       "      <td>      4</td>\n",
       "      <td> 96955</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10     </th>\n",
       "      <td>      4</td>\n",
       "      <td> 94430</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11     </th>\n",
       "      <td>      4</td>\n",
       "      <td> 94233</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12     </th>\n",
       "      <td>      4</td>\n",
       "      <td> 93311</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13     </th>\n",
       "      <td>      4</td>\n",
       "      <td> 93213</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14     </th>\n",
       "      <td>      4</td>\n",
       "      <td> 96100</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15     </th>\n",
       "      <td>      5</td>\n",
       "      <td> 93036</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16     </th>\n",
       "      <td>      5</td>\n",
       "      <td> 94813</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17     </th>\n",
       "      <td>      5</td>\n",
       "      <td> 96189</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18     </th>\n",
       "      <td>      5</td>\n",
       "      <td> 95814</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19     </th>\n",
       "      <td>      6</td>\n",
       "      <td> 94706</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 96325</td>\n",
       "      <td> 12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 93082</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 97056</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 96665</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 95739</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 96053</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 95114</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 96477</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 93233</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29     </th>\n",
       "      <td>      7</td>\n",
       "      <td> 95963</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922083</th>\n",
       "      <td> 613673</td>\n",
       "      <td> 97048</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922084</th>\n",
       "      <td> 613673</td>\n",
       "      <td> 97404</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922085</th>\n",
       "      <td> 613673</td>\n",
       "      <td> 94407</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922086</th>\n",
       "      <td> 613673</td>\n",
       "      <td> 97000</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922087</th>\n",
       "      <td> 613675</td>\n",
       "      <td> 93455</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922088</th>\n",
       "      <td> 613675</td>\n",
       "      <td> 95145</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922089</th>\n",
       "      <td> 613676</td>\n",
       "      <td> 96474</td>\n",
       "      <td> 13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922090</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 93571</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922091</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 96099</td>\n",
       "      <td> 11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922092</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 94862</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922093</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 94185</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922094</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 93738</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922095</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 96336</td>\n",
       "      <td> 14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922096</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 92925</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922097</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 95983</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922098</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 93451</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922099</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 96126</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922100</th>\n",
       "      <td> 613678</td>\n",
       "      <td> 95383</td>\n",
       "      <td> 16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922101</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 92830</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922102</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 96955</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922103</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 96267</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922104</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 96099</td>\n",
       "      <td>  6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922105</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 94096</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922106</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 95041</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922107</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 96060</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922108</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 94402</td>\n",
       "      <td>  5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922109</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 97358</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922110</th>\n",
       "      <td> 613679</td>\n",
       "      <td> 96551</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922111</th>\n",
       "      <td> 613681</td>\n",
       "      <td> 93449</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922112</th>\n",
       "      <td> 613681</td>\n",
       "      <td> 96872</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1922113 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            uid    sid  count\n",
       "0             0  94610      1\n",
       "1             0  92713      1\n",
       "2             0  94485      1\n",
       "3             1  96234      1\n",
       "4             1  94802      3\n",
       "5             1  93741      2\n",
       "6             2  96220      6\n",
       "7             4  93682      2\n",
       "8             4  97181      2\n",
       "9             4  96955      3\n",
       "10            4  94430      2\n",
       "11            4  94233      1\n",
       "12            4  93311      1\n",
       "13            4  93213      1\n",
       "14            4  96100      1\n",
       "15            5  93036      1\n",
       "16            5  94813      2\n",
       "17            5  96189      1\n",
       "18            5  95814      2\n",
       "19            6  94706      1\n",
       "20            7  96325     12\n",
       "21            7  93082      1\n",
       "22            7  97056      3\n",
       "23            7  96665      2\n",
       "24            7  95739      1\n",
       "25            7  96053      1\n",
       "26            7  95114      1\n",
       "27            7  96477      1\n",
       "28            7  93233      1\n",
       "29            7  95963      2\n",
       "...         ...    ...    ...\n",
       "1922083  613673  97048      1\n",
       "1922084  613673  97404      1\n",
       "1922085  613673  94407      3\n",
       "1922086  613673  97000      1\n",
       "1922087  613675  93455      1\n",
       "1922088  613675  95145      1\n",
       "1922089  613676  96474     13\n",
       "1922090  613678  93571      1\n",
       "1922091  613678  96099     11\n",
       "1922092  613678  94862      2\n",
       "1922093  613678  94185      2\n",
       "1922094  613678  93738      1\n",
       "1922095  613678  96336     14\n",
       "1922096  613678  92925      1\n",
       "1922097  613678  95983      2\n",
       "1922098  613678  93451      2\n",
       "1922099  613678  96126      2\n",
       "1922100  613678  95383     16\n",
       "1922101  613679  92830      2\n",
       "1922102  613679  96955      1\n",
       "1922103  613679  96267      1\n",
       "1922104  613679  96099      6\n",
       "1922105  613679  94096      3\n",
       "1922106  613679  95041      1\n",
       "1922107  613679  96060      3\n",
       "1922108  613679  94402      5\n",
       "1922109  613679  97358      1\n",
       "1922110  613679  96551      1\n",
       "1922111  613681  93449      2\n",
       "1922112  613681  96872      1\n",
       "\n",
       "[1922113 rows x 3 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_tp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "out_tp.to_csv('out.test.num.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
